#pragma once 

#include <time.h>
#include <stdlib.h>
#include <stdio.h>

#include "parser.h"
#include "utils.h"
#include "cuda.h"
#include "blas.h"
#include "connected_layer.h"

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#endif

	extern void predict_classifier( char *datacfg, char *cfgfile, char *weightfile, char *filename, int top );
	extern void test_detector( char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh );
	extern void run_voxel( int argc, char **argv );
	extern void run_yolo( int argc, char **argv );
	extern void run_detector( int argc, char **argv );
	extern void run_coco( int argc, char **argv );
	extern void run_writing( int argc, char **argv );
	extern void run_captcha( int argc, char **argv );
	extern void run_nightmare( int argc, char **argv );
	extern void run_dice( int argc, char **argv );
	extern void run_compare( int argc, char **argv );
	extern void run_classifier( int argc, char **argv );
	extern void run_char_rnn( int argc, char **argv );
	extern void run_vid_rnn( int argc, char **argv );
	extern void run_tag( int argc, char **argv );
	extern void run_cifar( int argc, char **argv );
	extern void run_go( int argc, char **argv );
	extern void run_art( int argc, char **argv );
	extern void run_super( int argc, char **argv );

	void average( int argc, char *argv[] )
	{
		char *cfgfile = argv[ 2 ];
		char *outfile = argv[ 3 ];
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		network sum = parse_network_cfg( cfgfile );

		char *weightfile = argv[ 4 ];
		load_weights( &sum, weightfile );

		int i, j;
		int n = argc - 5;
		for( i = 0; i < n; ++i ) {
			weightfile = argv[ i + 5 ];
			load_weights( &net, weightfile );
			for( j = 0; j < net.n; ++j ) {
				layer l = net.layers[ j ];
				layer out = sum.layers[ j ];
				if( l.type == CONVOLUTIONAL ) {
					int num = l.n*l.c*l.size*l.size;
					axpy_cpu( l.n, 1, l.biases, 1, out.biases, 1 );
					axpy_cpu( num, 1, l.weights, 1, out.weights, 1 );
					if( l.batch_normalize ) {
						axpy_cpu( l.n, 1, l.scales, 1, out.scales, 1 );
						axpy_cpu( l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1 );
						axpy_cpu( l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1 );
					}
				}
				if( l.type == CONNECTED ) {
					axpy_cpu( l.outputs, 1, l.biases, 1, out.biases, 1 );
					axpy_cpu( l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1 );
				}
			}
		}
		n = n + 1;
		for( j = 0; j < net.n; ++j ) {
			layer l = sum.layers[ j ];
			if( l.type == CONVOLUTIONAL ) {
				int num = l.n*l.c*l.size*l.size;
				scal_cpu( l.n, 1. / n, l.biases, 1 );
				scal_cpu( num, 1. / n, l.weights, 1 );
				if( l.batch_normalize ) {
					scal_cpu( l.n, 1. / n, l.scales, 1 );
					scal_cpu( l.n, 1. / n, l.rolling_mean, 1 );
					scal_cpu( l.n, 1. / n, l.rolling_variance, 1 );
				}
			}
			if( l.type == CONNECTED ) {
				scal_cpu( l.outputs, 1. / n, l.biases, 1 );
				scal_cpu( l.outputs*l.inputs, 1. / n, l.weights, 1 );
			}
		}
		save_weights( sum, outfile );
	}

	void speed( char *cfgfile, int tics )
	{
		if( tics == 0 ) tics = 1000;
		network net = parse_network_cfg( cfgfile );
		set_batch_network( &net, 1 );
		int i;
		time_t start = time( 0 );
		image im = make_image( net.w, net.h, net.c );
		for( i = 0; i < tics; ++i ) {
			network_predict( net, im.data1 );
		}
		double t = difftime( time( 0 ), start );
		printf( "\n%d evals, %f Seconds\n", tics, t );
		printf( "Speed: %f sec/eval\n", t / tics );
		printf( "Speed: %f Hz\n", tics / t );
	}

	void operations( char *cfgfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		int i;
		long ops = 0;
		for( i = 0; i < net.n; ++i ) {
			layer l = net.layers[ i ];
			if( l.type == CONVOLUTIONAL ) {
				ops += 2l * l.n * l.size*l.size*l.c * l.out_h*l.out_w;
			}
			else if( l.type == CONNECTED ) {
				ops += 2l * l.inputs * l.outputs;
			}
		}
		printf( "Floating Point Operations: %ld\n", ops );
		printf( "Floating Point Operations: %.2f Bn\n", ( float ) ops / 1000000000. );
	}

	void oneoff( char *cfgfile, char *weightfile, char *outfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		int oldn = net.layers[ net.n - 2 ].n;
		int c = net.layers[ net.n - 2 ].c;
		net.layers[ net.n - 2 ].n = 9372;
		net.layers[ net.n - 2 ].biases += 5;
		net.layers[ net.n - 2 ].weights += 5 * c;
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		net.layers[ net.n - 2 ].biases -= 5;
		net.layers[ net.n - 2 ].weights -= 5 * c;
		net.layers[ net.n - 2 ].n = oldn;
		printf( "%d\n", oldn );
		layer l = net.layers[ net.n - 2 ];
		copy_cpu( l.n / 3, l.biases, 1, l.biases + l.n / 3, 1 );
		copy_cpu( l.n / 3, l.biases, 1, l.biases + 2 * l.n / 3, 1 );
		copy_cpu( l.n / 3 * l.c, l.weights, 1, l.weights + l.n / 3 * l.c, 1 );
		copy_cpu( l.n / 3 * l.c, l.weights, 1, l.weights + 2 * l.n / 3 * l.c, 1 );
		*net.seen = 0;
		save_weights( net, outfile );
	}

	void partial( char *cfgfile, char *weightfile, char *outfile, int max )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights_upto( &net, weightfile, max );
		}
		*net.seen = 0;
		save_weights_upto( net, outfile, max );
	}

#include "convolutional_layer.h"
	void rescale_net( char *cfgfile, char *weightfile, char *outfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		int i;
		for( i = 0; i < net.n; ++i ) {
			layer l = net.layers[ i ];
			if( l.type == CONVOLUTIONAL ) {
				rescale_weights( l, 2, -.5 );
				break;
			}
		}
		save_weights( net, outfile );
	}

	void rgbgr_net( char *cfgfile, char *weightfile, char *outfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		int i;
		for( i = 0; i < net.n; ++i ) {
			layer l = net.layers[ i ];
			if( l.type == CONVOLUTIONAL ) {
				rgbgr_weights( l );
				break;
			}
		}
		save_weights( net, outfile );
	}

	void reset_normalize_net( char *cfgfile, char *weightfile, char *outfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		int i;
		for( i = 0; i < net.n; ++i ) {
			layer l = net.layers[ i ];
			if( l.type == CONVOLUTIONAL && l.batch_normalize ) {
				denormalize_convolutional_layer( l );
			}
			if( l.type == CONNECTED && l.batch_normalize ) {
				denormalize_connected_layer( l );
			}
			if( l.type == GRU && l.batch_normalize ) {
				denormalize_connected_layer( *l.input_z_layer );
				denormalize_connected_layer( *l.input_r_layer );
				denormalize_connected_layer( *l.input_h_layer );
				denormalize_connected_layer( *l.state_z_layer );
				denormalize_connected_layer( *l.state_r_layer );
				denormalize_connected_layer( *l.state_h_layer );
			}
		}
		save_weights( net, outfile );
	}

	layer normalize_layer( layer l, int n )
	{
		int j;
		l.batch_normalize = 1;
		l.scales = ( float* ) calloc( n, sizeof( float ) );
		for( j = 0; j < n; ++j ) {
			l.scales[ j ] = 1;
		}
		l.rolling_mean = ( float* ) calloc( n, sizeof( float ) );
		l.rolling_variance = ( float* ) calloc( n, sizeof( float ) );
		return l;
	}

	void normalize_net( char *cfgfile, char *weightfile, char *outfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		int i;
		for( i = 0; i < net.n; ++i ) {
			layer l = net.layers[ i ];
			if( l.type == CONVOLUTIONAL && !l.batch_normalize ) {
				net.layers[ i ] = normalize_layer( l, l.n );
			}
			if( l.type == CONNECTED && !l.batch_normalize ) {
				net.layers[ i ] = normalize_layer( l, l.outputs );
			}
			if( l.type == GRU && l.batch_normalize ) {
				*l.input_z_layer = normalize_layer( *l.input_z_layer, l.input_z_layer->outputs );
				*l.input_r_layer = normalize_layer( *l.input_r_layer, l.input_r_layer->outputs );
				*l.input_h_layer = normalize_layer( *l.input_h_layer, l.input_h_layer->outputs );
				*l.state_z_layer = normalize_layer( *l.state_z_layer, l.state_z_layer->outputs );
				*l.state_r_layer = normalize_layer( *l.state_r_layer, l.state_r_layer->outputs );
				*l.state_h_layer = normalize_layer( *l.state_h_layer, l.state_h_layer->outputs );
				net.layers[ i ].batch_normalize = 1;
			}
		}
		save_weights( net, outfile );
	}

	void statistics_net( char *cfgfile, char *weightfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		int i;
		for( i = 0; i < net.n; ++i ) {
			layer l = net.layers[ i ];
			if( l.type == CONNECTED && l.batch_normalize ) {
				printf( "Connected Layer %d\n", i );
				statistics_connected_layer( l );
			}
			if( l.type == GRU && l.batch_normalize ) {
				printf( "GRU Layer %d\n", i );
				printf( "Input Z\n" );
				statistics_connected_layer( *l.input_z_layer );
				printf( "Input R\n" );
				statistics_connected_layer( *l.input_r_layer );
				printf( "Input H\n" );
				statistics_connected_layer( *l.input_h_layer );
				printf( "State Z\n" );
				statistics_connected_layer( *l.state_z_layer );
				printf( "State R\n" );
				statistics_connected_layer( *l.state_r_layer );
				printf( "State H\n" );
				statistics_connected_layer( *l.state_h_layer );
			}
			printf( "\n" );
		}
	}

	void denormalize_net( char *cfgfile, char *weightfile, char *outfile )
	{
		gpu_index = -1;
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		int i;
		for( i = 0; i < net.n; ++i ) {
			layer l = net.layers[ i ];
			if( l.type == CONVOLUTIONAL && l.batch_normalize ) {
				denormalize_convolutional_layer( l );
				net.layers[ i ].batch_normalize = 0;
			}
			if( l.type == CONNECTED && l.batch_normalize ) {
				denormalize_connected_layer( l );
				net.layers[ i ].batch_normalize = 0;
			}
			if( l.type == GRU && l.batch_normalize ) {
				denormalize_connected_layer( *l.input_z_layer );
				denormalize_connected_layer( *l.input_r_layer );
				denormalize_connected_layer( *l.input_h_layer );
				denormalize_connected_layer( *l.state_z_layer );
				denormalize_connected_layer( *l.state_r_layer );
				denormalize_connected_layer( *l.state_h_layer );
				l.input_z_layer->batch_normalize = 0;
				l.input_r_layer->batch_normalize = 0;
				l.input_h_layer->batch_normalize = 0;
				l.state_z_layer->batch_normalize = 0;
				l.state_r_layer->batch_normalize = 0;
				l.state_h_layer->batch_normalize = 0;
				net.layers[ i ].batch_normalize = 0;
			}
		}
		save_weights( net, outfile );
	}

	void visualize( char *cfgfile, char *weightfile )
	{
		network net = parse_network_cfg( cfgfile );
		if( weightfile ) {
			load_weights( &net, weightfile );
		}
		visualize_network( net );
#ifdef OPENCV
		cvWaitKey( 0 );
#endif
	}
	/*
	int main(int argc, char **argv)
	{
		//test_resize("data/bad.jpg");
		//test_box();
		//test_convolutional_layer();
		if(argc < 2){
			fprintf(stderr, "usage: %s <function>\n", argv[0]);
			return 0;
		}
		gpu_index = find_int_arg(argc, argv, "-i", 0);
		if(find_arg(argc, argv, "-nogpu")) {
			gpu_index = -1;
		}

	#ifndef GPU
		gpu_index = -1;
	#else
		if(gpu_index >= 0){
			cuda_set_device(gpu_index);
		}
	#endif

		if (0 == strcmp(argv[1], "average")){
			average(argc, argv);
		} else if (0 == strcmp(argv[1], "yolo")){
			run_yolo(argc, argv);
		} else if (0 == strcmp(argv[1], "voxel")){
			run_voxel(argc, argv);
		} else if (0 == strcmp(argv[1], "super")){
			run_super(argc, argv);
		} else if (0 == strcmp(argv[1], "detector")){
			run_detector(argc, argv);
		} else if (0 == strcmp(argv[1], "detect")){
			float thresh = find_float_arg(argc, argv, "-thresh", .24);
			char *filename = (argc > 4) ? argv[4]: 0;
			test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh);
		} else if (0 == strcmp(argv[1], "cifar")){
			run_cifar(argc, argv);
		} else if (0 == strcmp(argv[1], "go")){
			run_go(argc, argv);
		} else if (0 == strcmp(argv[1], "rnn")){
			run_char_rnn(argc, argv);
		} else if (0 == strcmp(argv[1], "vid")){
			run_vid_rnn(argc, argv);
		} else if (0 == strcmp(argv[1], "coco")){
			run_coco(argc, argv);
		} else if (0 == strcmp(argv[1], "classify")){
			predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
		} else if (0 == strcmp(argv[1], "classifier")){
			run_classifier(argc, argv);
		} else if (0 == strcmp(argv[1], "art")){
			run_art(argc, argv);
		} else if (0 == strcmp(argv[1], "tag")){
			run_tag(argc, argv);
		} else if (0 == strcmp(argv[1], "compare")){
			run_compare(argc, argv);
		} else if (0 == strcmp(argv[1], "dice")){
			run_dice(argc, argv);
		} else if (0 == strcmp(argv[1], "writing")){
			run_writing(argc, argv);
		} else if (0 == strcmp(argv[1], "3d")){
			composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
		} else if (0 == strcmp(argv[1], "test")){
			test_resize(argv[2]);
		} else if (0 == strcmp(argv[1], "captcha")){
			run_captcha(argc, argv);
		} else if (0 == strcmp(argv[1], "nightmare")){
			run_nightmare(argc, argv);
		} else if (0 == strcmp(argv[1], "rgbgr")){
			rgbgr_net(argv[2], argv[3], argv[4]);
		} else if (0 == strcmp(argv[1], "reset")){
			reset_normalize_net(argv[2], argv[3], argv[4]);
		} else if (0 == strcmp(argv[1], "denormalize")){
			denormalize_net(argv[2], argv[3], argv[4]);
		} else if (0 == strcmp(argv[1], "statistics")){
			statistics_net(argv[2], argv[3]);
		} else if (0 == strcmp(argv[1], "normalize")){
			normalize_net(argv[2], argv[3], argv[4]);
		} else if (0 == strcmp(argv[1], "rescale")){
			rescale_net(argv[2], argv[3], argv[4]);
		} else if (0 == strcmp(argv[1], "ops")){
			operations(argv[2]);
		} else if (0 == strcmp(argv[1], "speed")){
			speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
		} else if (0 == strcmp(argv[1], "oneoff")){
			oneoff(argv[2], argv[3], argv[4]);
		} else if (0 == strcmp(argv[1], "partial")){
			partial(argv[2], argv[3], argv[4], atoi(argv[5]));
		} else if (0 == strcmp(argv[1], "average")){
			average(argc, argv);
		} else if (0 == strcmp(argv[1], "visualize")){
			visualize(argv[2], (argc > 3) ? argv[3] : 0);
		} else if (0 == strcmp(argv[1], "imtest")){
			test_resize(argv[2]);
		} else {
			fprintf(stderr, "Not an option: %s\n", argv[1]);
		}
		return 0;
	}
	*/
